#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from scrapy.selector import Selector
import time

print('start:', time.asctime( time.localtime(time.time()) ))
def show_money(money):
    number=float(money.split('-', 1)[0])
    if(money.find('万/月') > 0):
        money=round(number*10, 2)
    elif(money.find('千/月') > 0):
        money=round(number, 2)
    elif(money.find('万/年') > 0):
        money=round(number*10/12, 2)
    else:
        money=round(number/1000, 2)
    return money
page=1
f = open('./data/list.csv', 'w')
row=['职位,公司,城市,薪资\n']
while page <= 50:
    filename = './data/list-%s.html' % page
    page=page+1
    html = open(filename).read()
    s = Selector(text=html)
    divs=s.xpath('//div[@class="el"]')
    for div in divs:
        listt=div.xpath('.//p[contains(@class, "t1")]//a//text()').extract()
        if(len(listt)==0):
            continue
        listc=div.xpath('.//span[re:test(@class, "^t2$")]//text()').extract()
        lista=div.xpath('.//span[re:test(@class, "^t3$")]//text()').extract()
        listm=div.xpath('.//span[re:test(@class, "^t4$")]//text()').extract()
        if(len(listm) == 0):
            continue
        money=listm[0]
        if(money.find('-') < 0):
            continue
        money = show_money(money)
        row.append(listt[0].strip()+","+listc[0]+","+lista[0].split('-', 1)[0]+","+str(money)+"\n")
f.writelines(row)
f.close()
print('end:', time.asctime( time.localtime(time.time()) ))